library(ggplot2)
library(dplyr)
library(reshape)
setwd("/Users/kendraosburn/syracuse/719")
happiness <- read.csv('happiness_project.csv', sep=",", header=TRUE)
### ATTEMPT ONE -- this drops region
## Making data small for ease
just_score <- select(happiness, country, region, happiness_score)

## Getting the average of the scores by country
just_score_average <- just_score %>%
  group_by(country) %>%
  summarize(avg_score = mean(happiness_score))

## Arranging
just_score_average <- just_score_average %>%
  arrange(desc(avg_score))

## Getting average of avg_score
## TODO: Get all above average and below average
average_score <- just_score_average %>%
  summarize(average_score = mean(avg_score))

## 20 happiest and 20 least happy
happiest <- just_score_average %>% top_n(20)
least_happy <- just_score_average %>% top_n(-20)

### ATTEMPT 2 -- This keeps region and is cleaner 
#Create a new column called avg which is country avg
test<- happiness %>%
  group_by(country) %>%
  mutate(avg = mean(happiness_score)) %>%
  ungroup()

#Only the columns I care about
test_sm <- select(test, country, region, avg)

#Finally what we want!
test2 <- test_sm %>% group_by(country) %>% filter(avg == min(avg))

#Removing the created duplicates
unique_test <- unique(test2)

#Arranging
unique_test2 <- unique_test %>%
  arrange(desc(avg))

top <- unique_test2[1:20,]
bottom <- unique_test2[131:151,]

ggplot(top, aes(x=reorder(country, -avg), y=avg, fill=region)) +
  geom_bar(stat="identity") +
  coord_cartesian(ylim=c(6.75,7.65)) +
  theme(axis.text.x = element_text(angle = 90))

ggplot(bottom, aes(x=reorder(country, avg), y=avg, fill=region)) +
  geom_bar(stat="identity") +
  coord_cartesian(ylim=c(2,4.5)) +
  theme(axis.text.x = element_text(angle = 90))

FUN <- function(dat, x, y) {
  ggplot(dat, aes_string(x = x, y = y)) +
    geom_boxplot() +
    coord_flip()
}

for(column in colnames(happiness[,6:13])){
  print(FUN(happiness, "region", column ))
}

FUN2 <- function(dat, x, y, z) {
  ggplot(dat, aes_string(x = x, y = y)) +
    geom_boxplot() +
    theme(axis.text.x=element_text(angle=90, hjust=1))
}

for(column in colnames(happiness[,6:13])){
  print(FUN2(happiness, "region", column))
}

FUN2 <- function(dat, x, y, z) {
  ggplot(dat, aes_string(x = x, y = y, fill=x)) +
    geom_boxplot() +
    theme(axis.text.x=element_blank())
}

for(column in colnames(happiness[,6:13])){
  print(FUN2(happiness, "region", column))
}

happiness_sm <- select(happiness, year, happiness_score)
happiness_melt <- melt(happiness_sm, id=c('year', 'happiness_score'))
par(mfrow=c(1,3))

years <- unique(happiness_melt$year)
for (year in years) {
hist(happiness_melt[happiness_melt$year == year,]$happiness_score, 
     xlim=c(3,8), main = paste(year), xlab="Score", ylab="Frequency")
}

par(mfrow=c(3,3))
happiness_sm <- select(happiness, region, happiness_score)
happiness_melt <- melt(happiness_sm, id=c('region', 'happiness_score'))
regions <- unique(happiness_melt$region)
for (region in regions) {
hist(happiness_melt[happiness_melt$region == region,]$happiness_score, 
     xlim=c(3,8), main = paste(region), xlab="Score", ylab="Countries")
}

### ATTEMPT 2 -- This keeps region and is cleaner 
#Create a new column called avg which is country avg
FUN3 <- function(x) {
  df<- happiness %>%
    group_by(country) %>%
    mutate(avg = mean(eval(parse(text= x)))) %>%
    ungroup()
  df_sm <- select(df, country, region, avg)
  df_grouped <- df_sm %>% group_by(country) %>% filter(avg == min(avg))
  df_unique <- unique(df_grouped)
  df_arranged <- df_unique %>%
    arrange(desc(avg))
  top <- df_arranged[1:20,]
  bottom <- df_arranged[131:151,]
  
  ggplot(top, aes(x=reorder(country, -avg), y=avg, fill=region)) +
    geom_bar(stat="identity") +
    theme(axis.text.x = element_text(angle = 90)) +
    labs(title = x, subtitle = "Top 20")
}

FUN4 <- function(x) {
  df<- happiness %>%
    group_by(country) %>%
    mutate(avg = mean(eval(parse(text= x)))) %>%
    ungroup()
  df_sm <- select(df, country, region, avg)
  df_grouped <- df_sm %>% group_by(country) %>% filter(avg == min(avg))
  df_unique <- unique(df_grouped)
  df_arranged <- df_unique %>%
    arrange(desc(avg))
  top <- df_arranged[1:20,]
  bottom <- df_arranged[131:151,]

  ggplot(bottom, aes(x=reorder(country, avg), y=avg, fill=region)) +
    geom_bar(stat="identity") +
    theme(axis.text.x = element_text(angle = 90)) + 
    labs(title = x, subtitle = "Bottom 20")
}
columns <- colnames(happiness[,6:13])
for(column in columns){
  print(FUN3(column))
  print(FUN4(column))
}

FIN.